import pyquery
from pyquery import PyQuery as pq
import urllib
import sys,time

## set encoding
reload(sys)
sys.setdefaultencoding('utf-8')

def getCnUrl(s):
    return urllib.quote(s.encode('gbk'))

## set key word
if len(sys.argv) == 2:
    s = sys.argv[1]
else:
    s = '采购'

## http call
html = pq(url="http://news.yodao.com/search?q=" + getCnUrl(s) + "&start=0&length=10&s=rank&tl&tr=no_range&keyfrom=search.page")

## extract data
title = []
content = []
href = []

for text in html('h3 a'):
    singleTitle = "".join(pq(text).text().split(' '))
    if len(sys.argv) == 3:
        singleTitle += "  (" + pq(text).attr('href') + ")"
    title.append(singleTitle)
    href.append(pq(text).attr('href'))


for text in html('h3').next('p'):
    string = "".join(pq(text).text().split(' '))
    content.append(string)

## save 2 file
write = ""
with open('out/' + time.strftime("%Y-%m-%d", time.localtime()) + "_" + s, 'w') as f:
    for i in range(0,len(title)):
        out = title[i] + '\n' + content[i] + '\n'
        f.write(out + "\n")
        print(out)

    f.close()
